* Bernd Beber, Michael J. Gilligan, Jenny Guardado, and Sabrina Karim
* Stata code to replicate analysis for "Peacekeeping, International Norms, and Transactional Sex in Monrovia, Liberia"

* load dataset
    use Female18-30, clear

* declare survey design
    svyset psu [pweight=pweight], fpc(fpc1) || _n, fpc(fpc2) strata(stratum) singleunit(certainty)

* construct additional variables
    * transactional sex with UN personnel
    * code subject as having had transactional sex with UN personnel if at least one of the following is true:
        * (1) subject indirectly acknowledges transactional sex with a UN peacekeeper,
        * (2) subject reports transactional sex with UN personnel with any frequency,
        * (3) subject reports that last transactional sex was with UN personnel
    * first, include subjects who indirectly acknowledge transactional sex with a peacekeeper
    * that is, include a subject if she knows someone who has been asked for transactional sex by a peacekeeper, that person actually received something of value in exchange for sex, and the person was the subject herself
    * note that not knowing someone who has been asked by a peacekeeper for sex is not sufficient to rule out transactional sex, since encounters need not be peacekeeper-initiated
    gen tsUN="Yes" if tsUNyou=="Yes" & (tsUNreceived=="Cell phone" | tsUNreceived=="Clothing" | tsUNreceived=="Food" | tsUNreceived=="Help" | tsUNreceived=="Jewelry" | tsUNreceived=="Job" | tsUNreceived=="Medicine" | tsUNreceived=="Money" | tsUNreceived=="Shelter")
    * second, include subjects who report transactional sex with UN personnel with any frequency
    replace tsUN="Yes" if tsUNfrequency=="Daily" | tsUNfrequency=="Weekly" | tsUNfrequency=="Monthly" | tsUNfrequency=="Quarterly" | tsUNfrequency=="Yearly" | tsUNfrequency=="Less than once a year"
    replace tsUN="No" if tsUN=="" & tsUNfrequency=="Not applicable"
    * third, include subjects who report that last transactional sex was with UN personnel
    replace tsUN="Yes" if tsUNlast=="Yes"
    replace tsUN="No" if tsUN=="" & tsUNlast=="Not applicable"

    * transactional sex in general
    * code subject as having had transactional sex if at least one of the following is true:
        * (1) subject directly acknowledges transactional sex,
        * (2) subject reports an age of first transactional sex,
        * (3) subject reports transactional sex with any frequency,
        * (4) subject is coded as having had transactional sex with UN personnel
    * first, include subjects who directly acknowledge transactional sex
    gen tsany=1 if ts=="Yes"
    replace tsany=0 if ts=="No"
    * second, include subjects who report an age of first transactional sex
    replace tsany=1 if regexm(tsfirst, "than") | (tsfirst>"13" & tsfirst<"26")
    replace tsany=0 if tsany==. & tsfirst=="Not applicable"
    * third, include subjects who report transactional sex with any frequency
    replace tsany=1 if tsfrequency=="Daily" | tsfrequency=="Weekly" | tsfrequency=="Monthly" | tsfrequency=="Quarterly" | tsfrequency=="Yearly" | tsfrequency=="Less than once a year"
    replace tsany=0 if tsany==. & tsfrequency=="Not applicable"
    * fourth, include subjects who are coded as having had transactional sex with UN personnel
    replace tsany=1 if tsUN=="Yes"

    * if no transactional sex, no transactional sex with UN personnel either
    replace tsUN="No" if tsUN=="" & tsany==0

    * age at first transaction
    gen tsfirstnum=.
    for numlist 14/25: replace tsfirstnum=X if tsfirst=="X"
    * code imperfectly observed ages at first transaction
    replace tsfirstnum=13 if tsfirst=="Less than 14"
    replace tsfirstnum=26 if tsfirst=="Older than 25"
    * note that 93% of subjects report an age of first transaction between 14 and 25

    * year of first transaction
    gen tsfirstyear = 2012 - age + tsfirstnum

    * cognitive score
    * install code for Mokken scale procedure, if necessary:
    * ssc install msp
    msp test*
    gen score=test1+test2+test3

* save file with additional variables for use in R
    save Female18-30.temp, replace

* Page 2
    sum pweight if tsany==1 & tsUN=="Yes"
    di r(sum)
    local tstotal = `r(sum)'

* Page 11
    tab language

* Page 11
    preserve
        use LDHS_2007_excerpt
        svyset v001 [pw=v005], strata(v022)
        replace s639a=. if s639a==9
        * about 10% of never-married women between 18 and 30 in Monrovia engaged in transactional sex in the previous 12 months
        svy: tab s639a
        svy, subpop(if v026==0 & v012>=18 & v012<=30 & v501==0): tab s639a
    restore

* Page 14
    tab tsvaluelast if tsUNlast=="Yes"
    tab tsvaluelast if tsUNlast=="No"

* Table 1
    tab ts, miss
    tab ts if ts=="Yes" | ts=="No"
    svy: tab ts, miss

* Table 2
    tab tsany tsUN, miss cell
    tab tsUN if tsany==1, miss
    svy: tab tsany tsUN, miss cell

* Table 3
    tab tsfirst, miss
    tab tsfirst if regexm(tsfirst, "than") | (tsfirst>"13" & tsfirst<"26")
    svy: tab tsfirst, miss

* Table 4
    tab tsfrequency, miss
    tab tsfrequency if tsfrequency=="Daily" | tsfrequency=="Less than once a year" | tsfrequency=="Yearly" | tsfrequency=="Monthly" | tsfrequency=="Weekly" | tsfrequency=="Quarterly"
    svy: tab tsfrequency, miss

    tab tsUNfrequency, miss
    tab tsUNfrequency if tsUNfrequency=="Daily" | tsUNfrequency=="Less than once a year" | tsUNfrequency=="Yearly" | tsUNfrequency=="Monthly" | tsUNfrequency=="Weekly" | tsUNfrequency=="Quarterly"
    svy: tab tsUNfrequency, miss

* Table 5
    tab tspayment, miss
    tab tspayment if tspayment!="Don't know" & tspayment!="Refuse" & tspayment!="Not applicable" & tspayment!=""
    svy: tab tspayment, miss

* Table 6
    tab tsvalue, miss
    tab tsvalue if tsvalue!="Don't know" & tsvalue!="Refuse" & tsvalue!="Not applicable" & tsvalue!=""
    svy: tab tsvalue, miss

* Table 7
    tab tsUNlast, miss
    tab tsUNlast if tsUNlast=="Yes" | tsUNlast=="No"
    svy: tab tsUNlast, miss

    tab tsforeign, miss
    tab tsforeign if tsforeign!="Don't know" & tsforeign!="Refuse" & tsforeign!="Not applicable" & tsforeign!=""
    svy: tab tsforeign, miss

* Table 8
    * survival analysis
    * compute number of years at risk of first transaction
    gen atrisk=.
    replace atrisk=age if tsany==0
    replace atrisk=tsfirstnum if tsany==1
    * drop if no information about years at risk
    drop if atrisk==.
    * expand dataset to subject-year observations
    expand atrisk
    * analysis time identifier for years at risk, which is identical to age of subject at a given time
    bysort id_hh: gen analysis_age=_n
    * rename age variable to clarify distinction
    rename age age_at_survey
    * corresponding calendar year
    gen year = 2012 - age_at_survey + analysis_age
    * indicator for first transaction
    bysort id_hh: gen fail = tsany==1 & _n==_N
    * merge data on UNMIL uniformed personnel
    sort year
    merge year using UNMIL
    drop _merge
    sort id_hh analysis_age
    * set pre-2003 personnel levels to zero and rescale
    for varlist total african: replace X=0 if year<2003
    for varlist total african: replace X=X/1000
    * indicator for first Liberian civil war
    gen war1=0
    replace war1=1 if year>1988 & year<1997
    * indicator for second Liberian civil war
    gen war2=0
    replace war2=1 if year>1998 & year<2004

    * the unit of analysis is the subject-year for subjects at risk
    * we cannot include calendar-year fixed effects because troop levels vary at an annual level (and do not vary cross-sectionally)
    * we account for temporal dependence by including a linear calendar-year trend (year) and indicators for war periods (war2 for 1999-2003 and war1 for 1989-1996)

    * discrete-time models
    * with flexible specification for baseline hazard (i.e. analysis-time indicators)
    * analysis-time indicators for yearly ages below the minimum and above the maximum age of first transaction completely determine non-failure, so those variables and cases are dropped automatically
    * manually set omitted category in fixed effects specification, since it needs to be one that does not completely determine non-failure
    char analysis_age[omit] 13
    xi: logit fail total war2 war1 score year i.analysis_age, or
    xi: logit fail african war2 war1 score year i.analysis_age, or

    * continuous-time models
    * ancillary parameter (p>1) suggests hazard that is increasing in analysis age
    stset analysis_age, failure(fail) id(id_hh)
    xi: streg total war2 war1 score year, dist(wei) nolog
    xi: streg african war2 war1 score year, dist(wei) nolog

* Page 4
    * implications of most conservative point estimate
    xi: logit fail total war2 war1 score year i.analysis_age, or
    * comparison of cumulative predicted probabilities with and without UNMIL
    local N = _N
    local Nl = `N' + 1
    local Nm = `N' + 25
    local Nn = `N' + 26
    local Nu = `N' + 50
    preserve
        set obs `Nu'
        gen _Ianalysis__13=.
        replace war1=0 in `Nl'/`Nu'
        replace war2=0 in `Nl'/`Nu'
        replace score=2 in `Nl'/`Nu'
        replace year=2008 in `Nl'/`Nu'
        replace total=0 in `Nl'/`Nm'
        replace total=11.85 in `Nn'/`Nu'
        local j = 1
        forvalues i = `Nl'/`Nm' {
            replace _Ianalysis__`j' = 1 in `i'
            local j = `j' + 1
        }
        local j = 1
        forvalues i = `Nn'/`Nu' {
            replace _Ianalysis__`j' = 1 in `i'
            local j = `j' + 1
        }
        foreach i of varlist _Ianalysis__* {
            replace `i' = 0 if `i'==. in `Nl'/`Nu'
        }
        predict pred in `Nl'/`Nu'
        gen prob = 1 - pred
        local probNone = 1
        local probUN = 1
        forvalues i = `Nl'/`Nm' {
            if(prob[`i']!=.) local probNone = `probNone' * prob[`i']
        }
        forvalues i = `Nn'/`Nu' {
            if(prob[`i']!=.) local probUN = `probUN' * prob[`i']
        }
        di `probNone'
        di `probUN'
        di `probNone'/`probUN'
    restore
    * estimated total effect of UNMIL
    preserve
        predict pred1
        gen exp1 = pweight * pred1
        replace total = 0
        predict pred2
        gen exp2 = pweight * pred2
        sum exp1
        local exp1 = `r(sum)'
        sum exp2
        local exp2 = `r(sum)'
        di `exp1' - `exp2'
        di (`exp1' - `exp2') / `tstotal'
    restore

* Figure 2
    * plot predicted changes in "survival" for median case (women with median cognitive scores in non-war year 2008)
    xi: streg total war2 war1 score year, dist(wei) nolog
    preserve
        tempfile stcurv1
        keep if analysis_age>13 & analysis_age<27
        stcurv, survival at(total=0 war2=0 war1=0 score=2 year=2008) at(total=11.85 war2=0 war1=0 score=2 year=2008) at(total=16.1 war2=0 war1=0 score=2 year=2008) yscale(range(0 1)) ylab(0(.2)1, nogrid) ytitle("Share never engaged in transactional sex") xlab(14(4)26) xtitle("Analysis age") title("") legend(subtitle("Total UNMIL uniformed personnel:", tstyle(body)) rows(1) order(1 "0" 2 "11,850 (actual in 2008)" 3 "16,100 (max)") region(lcolor(white)) ring(0) position(6)) scheme(s2mono) graphregion(color(white)) bgcolor(white) outfile(`stcurv1')
        use `stcurv1', clear
        list
    restore
    xi: streg african war2 war1 score year, dist(wei) nolog
    preserve
        tempfile stcurv2
        keep if analysis_age>13 & analysis_age<27
        stcurv, survival at(african=0 war2=0 war1=0 score=2 year=2008) at(african=3.75 war2=0 war1=0 score=2 year=2008) at(african=7.5 war2=0 war1=0 score=2 year=2008) yscale(range(0 1)) ylab(0(.2)1, nogrid) ytitle("Share never engaged in transactional sex") xlab(14(4)26) xtitle("Analysis age") title("") legend(subtitle("UNMIL uniformed personnel from African countries:", tstyle(body)) rows(1) order(1 "0" 2 "3,750 (actual in 2008)" 3 "7,500 (max)") region(lcolor(white)) ring(0) position(6)) scheme(s2mono) graphregion(color(white)) bgcolor(white) outfile(`stcurv2')
        use `stcurv2', clear
        list
    restore

* Table 9
    * poor performance on cognitive tests could be associated with poor recall (perhaps with women falsely associating UNPK arrival with their first transactional sex)
    * as a robustness check, run on subsample of women who received perfect score on cognitive tests
    xi: logit fail total war2 war1 score year i.analysis_age if score==3, or
    xi: logit fail african war2 war1 score year i.analysis_age if score==3, or
    char analysis_age[omit]
    xi: streg total war2 war1 score year if score==3, dist(wei) nolog
    xi: streg african war2 war1 score year if score==3, dist(wei) nolog

* Table 10
    * adjust for variation in age distribution across analysis years
    * since women interviewed in 2012 were ages 18-30, age distribution of risk set varies substantially over time
    * we can use household roster data to construct post-stratification weights for appropriate adjustment
    * weighting ensures that each calendar year's age distribution reflects our estimate of the true age distribution of women at that time rather than project-related attrition or sampling design

    * save current data
    tempfile expanded
    save `expanded'

    * use roster to construct full age distribution for 2012
    use HH_roster, clear
    * collect ages for all female household members
    tempfile ages
    forvalues i = 1/12 {
        preserve
        keep if r_sex`i'=="F" & r_age`i'!=.
        keep r_age`i'
        rename r_age`i' ages
        cap append using `ages'
        save `ages', replace
        restore
    }
    use `ages'
    * get raw age distribution
    gen obs=ages
    collapse (count) obs, by(ages)
    forvalues i = 0/99 {
        quietly count if ages==`i'
        if `r(N)'==0 {
            local N = _N
            local N = `N' + 1
            set obs `N'
            replace ages = `i' in `N'
        }
    }
    sort ages

    * given this age distribution, compute expected shares for each age group in each calendar year
    * in doing so, we allow variation in initial cohort size (e.g. due to fewer births in war years) and estimate survivorship rates non-parametrically, using splines
    * first, use splines to estimate survivorship non-parametrically
    * since each cohort must get smaller over time, we restrict the slope coefficients to be negative for all intervals
    mkspline agespline 6 = ages
    nl (obs = - exp({lna}) * agespline1 - exp({lnb}) * agespline2 - exp({lnc}) * agespline3 - exp({lnd}) * agespline4 - exp({lne}) * agespline5 - exp({lnf}) * agespline6 + {g}), nolog
    predict expsize
    twoway line expsize ages || scatter obs ages
    * note that the slope of the first interval is extremely close to zero (too close for the standard error to be reported), because we require negative slopes
    * given these estimated survivorship rates, allow variation in initial cohort size
    * in essence, we draw a version of the previously estimated line through each observation in order to get time-varying age group shares
    forvalues i = 0/99 {
        sum obs if ages==`i'
        if `r(sum)'>0 {
            local actual = `r(sum)'
            sum expsize if ages==`i'
            local predicted = `r(sum)'
            gen expsize`i' = expsize * (`actual' / `predicted')
        }
    }
    * example of survivorship for cohort aged 12 and cohort aged 21 at time of survey
    twoway line expsize12 ages || line expsize21 ages || scatter obs ages
    * for each calendar year, compute expected share of each yearly age group, which we can then use to post-stratify the survey data
    forvalues i = 1980/2012 {
        gen expsize`i'=.
        forvalues j = 0/99 {
            local k = `j' + (2012 - `i')
            * expected number of j-aged females in year i, given how many of these females we observed at age k in 2012
            cap replace expsize`i' = expsize`k' if ages==`j'
        }
        summ expsize`i'
        gen expshare`i' = expsize`i' / `r(sum)'
    }
    * example of expected age distributions for 1996, 2000, 2004, 2008, and 2012 (which is identical to the actual observed distribution)
    twoway line expsize2012 ages || line expsize2008 ages || line expsize2004 ages || line expsize2000 ages || line expsize1996 ages || scatter obs ages
    sort ages
    tempfile expshareswide
    save `expshareswide'

    * prepare for merge
    tempfile expshares
    keep ages expshare*
    reshape long expshare, i(ages) j(year)
    rename ages analysis_age
    sort analysis_age year
    save `expshares'
    * merge estimates into survey data
    use `expanded'
    sort analysis_age year
    merge analysis_age year using `expshares'
    drop if _merge==2
    drop _merge
    sort id_hh year

    * program to construct post-stratification weights for a given range of calendar years or ages
    * the program ensures that a particular age group is included only if it is available for all analysis years
    * for example, we would not want to include 14-year-olds in an analysis of the years 1996-2010, because we do not have subjects who were 14 in 2010
    cap program drop ps
    program define ps
        syntax [, Years(numlist integer >=1980 <=2012) Ages(numlist integer >=0 <=99)]
        * make sure either year or age range was specified
        if("`years'"=="" & "`ages'"=="") {
            di "Either years() or ages() must be specified as option"
            exit
        }
        if("`years'"!="" & "`ages'"!="") {
            di "Only one of years() or ages() must be specified as option"
            exit
        }
        * infer age range from range of calendar years, and vice versa
        tempvar incl maxagevar minagevar lastyearvar firstyearvar psw
        gen `incl'=0
        if("`years'"!="") {
            foreach i of numlist `years' {
                * infer age range from range of calendar years
                tempvar ydiff`i' yupper`i' ylower`i'
                gen `ydiff`i'' = 2012 - `i'
                gen `yupper`i'' = 30 - `ydiff`i''
                qui replace `yupper`i'' = 0 if `yupper`i''<0
                gen `ylower`i'' = 18 - `ydiff`i''
                qui replace `ylower`i'' = 0 if `ylower`i''<0
                local yupperlist "`yupperlist' `yupper`i''"
                local ylowerlist "`ylowerlist' `ylower`i''"
                * mark observations to be included
                qui replace `incl'=1 if year==`i'
            }
            egen `maxagevar' = rowmin(`yupperlist')
            local maxage = `maxagevar'[1]
            egen `minagevar' = rowmax(`ylowerlist')
            local minage = `minagevar'[1]
            numlist "`minage'/`maxage'"
            local ages = "`r(numlist)'"
            * unmark observations outside of maximal age range
            qui replace `incl'=0 if analysis_age>`maxagevar' | analysis_age<`minagevar'
        }
        else {
            foreach i of numlist `ages' {
                * infer range of calendar years from age range
                tempvar adiff`i' aupper`i' alower`i'
                gen `adiff`i'' = 2012 + `i'
                gen `aupper`i'' = `adiff`i'' - 18
                qui replace `aupper`i'' = 2012 if `aupper`i''>2012
                gen `alower`i'' = `adiff`i'' - 30
                qui replace `alower`i'' = 2012 if `alower`i''>2012
                local aupperlist "`aupperlist' `aupper`i''"
                local alowerlist "`alowerlist' `alower`i''"
                * mark observations to be included
                qui replace `incl'=1 if analysis_age==`i'
            }
            egen `lastyearvar' = rowmin(`aupperlist')
            local lastyear = `lastyearvar'[1]
            egen `firstyearvar' = rowmax(`alowerlist')
            local firstyear = `firstyearvar'[1]
            numlist "`firstyear'/`lastyear'"
            local years = "`r(numlist)'"
            * unmark observations outside of maximal range of calendar years
            qui replace `incl'=0 if year>`lastyearvar' | year<`firstyearvar'
        }
        * compute actual data counts and construct weights for each calendar year and age group
        qui gen `psw' = .
        foreach i of numlist `years' {
            qui summ analysis_age if year==`i' & `incl'==1
            local totalinyear = `r(N)'
            foreach j of numlist `ages' {
                qui count if analysis_age==`j' & year==`i' & `incl'==1
                local totalage = `r(N)'
                * weights are expected share / (number of obs for calendar year and age group / total number of obs in that year)
                qui replace `psw' = expshare / (`totalage' / `totalinyear') if analysis_age==`j' & year==`i' & `incl'==1
            }
            qui count if `incl'==1 & year==`i'
            local samplesize = `r(N)'
            qui summ `psw' if year==`i'
            qui replace `psw' = (`samplesize' / (`r(N)' * `r(mean)')) * `psw' if year==`i'
        }
        * ensure that each sampling unit is associated with a unique weight
        qui: bysort id_hh: egen psw=mean(`psw')
        qui replace psw=. if `psw'==.
        di "Range of calendar years: `years'"
        di "Age range: `ages'"
    end

    * an analysis with balanced age distributions across calendar years requires that we restrict the range of calendar years included in the analysis
    * for example, we could not include both 1990 and 2010 in such an analysis:
    * - our age distribution for 1990 has no support above age 8, because the oldest subjects to answer questions about transactional sex were 30 in 2012 and hence 8 in 1990,
    * - but age distribution for 2010 has no support below age 16, because the youngest subjects were 18 in 2012 and hence 16 in 2010

    * note also that the supported age range depends on the calendar years included in the analysis
    * for example, for 2002 to 2008, we have data for women aged 14-20 in a given year, because:
    * - women aged 20 in 2002 are 30 at the time of the survey in 2012, i.e. at the upper age limit for sensitive interviews
    * - women aged 14 in 2008 are 18 at the time of the survey in 2012, i.e. at the lower age limit for sensitive interviews

    * here, use the range of years that is optimal in terms of covering the largest total annual cases of women at risk, given that it:
    * - includes the year of UN deployment (2003) and the year right before and right after deployment,
    * - includes women aged 16-18 in the risk set, because they are particularly likely to engage in transactional sex for the first time
    * - does not include girls aged 13 or younger in the risk set, because they are relatively unlikely to engage in transactional sex
    * see the R replication code for computational details
    * the optimal interval given these criteria covers the years 2000 to 2008 and hence 14- to 18-year-olds

    * analysis with balanced age distributions
    preserve
        ps, y(2000/2008)
        drop if psw==.
        * discrete-time models
        xi: logit fail total war2 war1 score year i.analysis_age [pw=psw], or
        xi: logit fail african war2 war1 score year i.analysis_age [pw=psw], or
        * continuous-time models
        stset analysis_age [pw=psw], failure(fail) id(id_hh)
        xi: streg total war2 war1 score year, dist(wei) nolog
        xi: streg african war2 war1 score year, dist(wei) nolog
    restore

    * compare raw and weighted age distributions
    preserve
        ps, y(2000/2008)
        drop if psw==.
        * get unweighted distribution from survey data
        forvalues i = 2000/2008 {
            prop analysis_age if year==`i'
            matrix a = e(b)
            matrix a = a'
            svmat a, name(uwshares`i')
            rename uwshares`i'1 uwshares`i'
        }
        * get weighted distribution from survey data
        forvalues i = 2000/2008 {
            prop analysis_age [pw=psw] if year==`i'
            matrix a = e(b)
            matrix a = a'
            svmat a, name(pswshares`i')
            rename pswshares`i'1 pswshares`i'
        }
        gen ages=14 in 1
        replace ages=15 in 2
        replace ages=16 in 3
        replace ages=17 in 4
        replace ages=18 in 5
        keep uwshares2000-ages
        keep in 1/5
        order ages
        sort ages
        * merge and rescale distributions estimated from roster data
        merge ages using `expshareswide'
        drop if _merge==2
        forvalues i = 2000/2008 {
            qui summ expshare`i'
            qui replace expshare`i' = expshare`i' / `r(sum)'
        }
        * compare distributions graphically
        forvalues i = 2000/2008 {
            twoway line expshare`i' ages || line uwshares`i' ages || line pswshares`i' ages
            graph copy comparison`i', replace
        }
        graph combine comparison2000 comparison2001 comparison2002 comparison2003 comparison2004 comparison2005 comparison2006 comparison2007 comparison2008, ycommon xcommon altshrink
        * plot difference between distributions
        forvalues i = 2000/2008 {
            gen diffpswexp`i' = abs(pswshares`i' - expshare`i')
            gen diffuwexp`i' = abs(uwshares`i' - expshare`i')
            twoway line diffpswexp`i' ages || line diffuwexp`i' ages
            graph copy difference`i', replace
        }
        graph combine difference2000 difference2001 difference2002 difference2003 difference2004 difference2005 difference2006 difference2007 difference2008, ycommon xcommon altshrink
    restore

    * restore data
    use `expanded', clear

* see R code for Figures 1 and 3

* Supplementary materials
* Table S.1
    preserve
        use Female18-30.temp, clear
        for any cats1 cats2: gen X=0 if tsUN=="Yes"
        replace cats1=1 if tsUN=="No" & tsany==1
        replace cats2=1 if tsany==0
        gen varname=""
        foreach i in varmin varmax meanUN meanNoUN meanNo diff pval {
            qui gen `i' = .
            format `i' %04.2f
        }
        local count = 1
        quietly {
            foreach var of varlist age christian married score PKsold PKsocial PKspoke PKproblem PKsafety PKownsafety PKpositive PKsolve PKnegative PKstay owner businesses sellforeign lnearnings employee jobs lnwages savings lnassets enoughfood enoughhousing enoughhealth enoughschool electricity water toilet deaths numdeaths headdeath disabled confiscation {
                summ `var'
                replace varmin = `r(min)' in `count'
                replace varmax = `r(max)' in `count'
                ttest `var', by(cats1)
                replace varname = "`var'" in `count'
                replace meanUN = `r(mu_1)' in `count'
                replace meanNoUN = `r(mu_2)' in `count'
                replace diff = `r(mu_1)' - `r(mu_2)' in `count'
                replace pval = `r(p)' in `count'
                local count = `count' + 1
                qui ttest `var', by(cats2)
                replace meanNo = `r(mu_2)' in `count'
                replace diff = `r(mu_1)' - `r(mu_2)' in `count'
                replace pval = `r(p)' in `count'
                local count = `count' + 1
            }
        }
        list varname varmin varmax meanUN meanNoUN meanNo diff pval if pval!=.
    restore

* Table S.2
    * other robustness checks
    * control for age at survey instead of linear time trend
    xi: streg total war2 war1 score age_at_survey, dist(wei) nolog
    xi: streg african war2 war1 score age_at_survey, dist(wei) nolog
    * note that age at survey and calendar year are exactly interchangeable controls in our discrete-time specifications
    char analysis_age[omit] 13
    xi: logit fail total war2 war1 score age_at_survey i.analysis_age, or
    xi: logit fail african war2 war1 score age_at_survey i.analysis_age, or

    * cohort dummies instead of linear time trend
    xi: streg total war2 war1 score i.age_at_survey, dist(wei) nolog
    xi: streg african war2 war1 score i.age_at_survey, dist(wei) nolog

    * estimate log-logistic instead of Weibull to allow non-monotonic hazard function
    * to facilitate comparisons to other specifications, we report odds ratios instead of coefficients
    * program to convert coefficient in odds ratio and appropriate standard error
    cap program drop or
    program define or
        syntax varlist
        preserve
        qui set obs 100000
        foreach i of varlist `varlist' {
            gen _`i'_csim = rnormal(_b[`i'], _se[`i'])
            * convert simulated coefficients into odds ratios
            gen _`i'_osim = exp(-_`i'_csim * (1/e(gamma)))
            * get standard error
            qui summ _`i'_osim
            local se = `r(sd)'
            * get odds ratio
            local or = exp(-_b[`i'] * (1/e(gamma)))
            * report results
            di "`i': Odds ratio " %-04.3f `or' ", with standard error " %-04.3f `se'
        }
    end
    * run estimation
    * ancillary parameter (gamma<1) suggests hazard that is initially increasing and later decreasing in analysis age
    set seed 7643943
    xi: streg total war2 war1 score year, dist(logl) nolog
    or total war2 war1 score year
    xi: streg african war2 war1 score year, dist(logl) nolog
    or african war2 war1 score year

